---
title: "The Diffusion of Exclusion: Medieval Expulsions of Jews"
author: "Kerice Doten-Snitker"
date: "`r Sys.Date()`"
output: html_document
---

```{r setup, echo = FALSE, cache = FALSE}
opts_chunk$set(dev = c('png', 'pdf'), 
        fig.align = 'center', fig.height = 5, fig.width = 10, 
        fig.path=here::here("Diffusion", "out_figures/")) 

# a file with some helper functions for dealing with the output:
source(here::here("functions","broomify-rstanarm.r"))
```

```{r load_pander_methods, include= FALSE}
require(pander)
panderOptions('round', 2)
panderOptions('digits',7)
panderOptions('keep.trailing.zeros', TRUE)
panderOptions('table.style', 'multiline')
#panderOptions('table.split.table', Inf)
```
```{r load_ggplot_options, echo=FALSE}
mytheme <- theme_bw() + 
  theme(legend.position="right", legend.direction="vertical", 
        legend.background = element_rect(fill="transparent", colour=NA),
        legend.key = element_rect(fill="transparent", colour=NA),
        legend.key.size=unit(.33,"cm"),
        legend.title=element_text(size=12),
        legend.text=element_text(size=12),
        panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(color = "black"),
        text=element_text(family="serif"),
        plot.title=element_text(size=24),
        plot.subtitle=element_text(size=16),
        axis.title=element_text(size=16,face="bold"),
        axis.text=element_text(size=16,face="bold"))

```

Data is arranged by settlement-year, restricted to observations with certain Jewish presence AND no nicht-städtische settlements.  

## Exploration and descriptives

#### How many cases am I dealing with?
```{r basics}
nrow(model_data) # n(settlement-period obs)
length(unique(model_data$PlaceName)) # n(names)
length(unique(model_data$PlaceID)) # n(PlaceIDs)
length(unique(model_data$ObsID)) # n(unique IDs)
table(model_data$Vrtrb) # count of x=1

```

#### What are the temporal trends in settlement and expulsion?

```{r Figs2_histograms, eval=TRUE, echo=FALSE, fig.height = 2.5, fig.width = 6}

Expl.years <- model_data_all %>%
  dplyr::select(Year, Vrtrb) %>%
  group_by(Year) %>% 
  summarize(Setts_n = n(), # total settlements in a year
    Expl_sum = sum(Vrtrb), # total expelling cities in a year
    Expl_pct = sum(Vrtrb)/n()) # pct expelling cities in a year

# Expl.years.long <- Expl.years %>%
#   gather(key="key", value="value", -Year)

# (Figure 1 is map)
# Figure 2a ----
# histogram of settlement by year
p.setts <- ggplot(data=Expl.years, aes(x=Year, y=Setts_n)) + 
  geom_line(linewidth=1) +
  scale_y_continuous(breaks=seq(0,450,50)) +
  labs(title="", 
    x="", y="Total cities") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

plot(p.setts)

# Figure 2b ----
# plot of expulsions by year - counts
p.expl <- ggplot(data=Expl.years, aes(x=Year, y=Expl_sum)) + 
  geom_col() +
  scale_x_continuous(breaks=seq(1000,1520,100)) +
  labs(title="", 
    x="Year", y="Total expulsions") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))
  
plot(p.expl)

# Figure 2c ----
# arrange the two plots in a single column
# p.compare_expl <- cowplot::plot_grid(p.setts, p.expl,
#                   ncol = 1,
#                   rel_heights = c(2,1),
#                   align = 'hv',
#                   labels = c("", ""),
#                   hjust = -1
#                   )
# 
# plot(p.compare_expl)

# # plot of expulsions by year - proportions
# p.expl_pct <- ggplot(data=Expl.years, aes(x=Year, y=Expl_pct)) + 
#   geom_col() +
#   scale_x_continuous(breaks=seq(1000,1520,100)) +
#   labs(title="Annualized expulsions as a proportion of cities with Jewish residents", 
#     x="Period", y="Proportion of cities") +
#   mytheme
#   
# plot(p.expl_pct)


```
```{r Figs3_histograms_lags, eval=FALSE, echo=FALSE, fig.height = 3, fig.width = 6}
# grab the already-prepped file
Expl.matrix <- readRDS(here::here("Diffusion","out_files", "Expl_matrix.rds"))

# Figure  3 ----
# plot of 10 year lag of expulsions by year, from Wenceslaus's first Jewish debt and tax confiscation in 1385
p.expl_10yr <- ggplot(data=Expl.matrix[Expl.matrix$Year>=1385,], 
  aes(x=Year, y=Expl_sum10)) + 
  geom_col(color="black", fill=NA) +
  scale_x_continuous(breaks=seq(1375,1520,25)) +
  labs(title="", 
    x="Year", y="Lagged Expulsions") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

plot(p.expl_10yr)

# plot of 5 year lag of expulsions by year
p.expl_5yr <- ggplot(data=Expl.matrix[Expl.matrix$Year>1385,], 
  aes(x=Year, y=Expl_sum5)) + 
  geom_col(color="black", fill=NA) +
  #geom_smooth(method=loess) +
  scale_x_continuous(breaks=seq(1375,1520,25)) +
  labs(title="", 
    x="Year", y="Lagged Expulsions") +
  mytheme  +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

plot(p.expl_5yr)

# plot of 20 year lag of expulsions by year
p.expl_20yr <- ggplot(data=Expl.matrix[Expl.matrix$Year>1350,], 
  aes(x=Year, y=Expl_sum20)) + 
  geom_col(color="black", fill=NA) +
  #geom_smooth(method=loess) +
  scale_x_continuous(breaks=seq(1375,1520,25)) +
  labs(title="", 
    x="Year", y="Lagged Expulsions") +
  mytheme  +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

plot(p.expl_20yr)
```

#### What are the trends by dominion types? 

First, a figure shows histograms of dominion types in the study period (need to add this). Second, figures tracks expulsions in cities with each dominion type, annually and with a 10 year lag.

```{r histograms_dom, eval=TRUE, echo=FALSE, fig.height = 6, fig.width = 6}

Expl.matrix.dom <- readRDS(here::here("Diffusion","out_files", "Expl_matrix_dom.rds"))

## By dominion type
Expl.matrix.dom.tidy <- Expl.matrix.dom %>% 
  dplyr::select(c(1,2:3,5,10,13:14,16,21,24:25,27,32,35:36,38,43,46:47,49,54)) %>%
  gather(key, Expl_sum, -Year) %>%
  tidyr::extract(key, c("Span","DomType"), "Expl_(.+)_(.+)")

# consider limiting to the few categories I will use later

# plot of expulsions by year, by dominion type
p.dom.ann <- ggplot(data=Expl.matrix.dom.tidy[Expl.matrix.dom.tidy$Span=="ann"&Expl.matrix.dom.tidy$Year>1375,],
  aes(x=Year, y=Expl_sum)) + 
  geom_col() +
  facet_wrap(~ DomType, ncol=1) +
  scale_x_continuous(breaks=seq(1375,1520,30)) +
  labs(title="Total expulsions by year by dominion type", 
    x="Year", y="Expulsions") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

plot(p.dom.ann)

# plot of 10 year lag of expulsions by year, by dominion type
p.dom.sum10 <- ggplot(data=Expl.matrix.dom.tidy[Expl.matrix.dom.tidy$Span=="sum10"&Expl.matrix.dom.tidy$Year>1375,], 
  aes(x=Year, y=Expl_sum)) + 
  geom_line() +
  facet_wrap(~ DomType, ncol=1) +
  labs(title="Total expulsions in prior 10 years by dominion type", 
    x="Year", y="Expulsions") +
  scale_y_continuous(breaks=c(1,3,5,7)) + 
  scale_x_continuous(breaks=seq(1375,1520,25)) +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

plot(p.dom.sum10)

# ggplot(data=Expl.matrix.dom.tidy[Expl.matrix.dom.tidy$Span=="sum5"&Expl.matrix.dom.tidy$Year>1350,], 
#   aes(x=Year, y=Expl_sum)) + 
#   geom_line() +
#   facet_wrap(~ DomType) +
#   labs(title="Total expulsions in prior 5 years by dominion type", 
#     x="Year", y="Expulsions") +
#   mytheme

```

#### What are the trends by political variables? 


```{r histograms_pol, eval=TRUE, echo=TRUE}
mydata_bunde <- readRDS(here::here("in_data", "Distler_Staedtebunde_prepped.rds"))
# By Bund membership
table(model_data$Bund, model_data$Vrtrb) # 0 cities were Bund members when they expelled
table(model_data$BundTotPrev, model_data$Vrtrb) # 41 expulsions were in a city previously party to 10 Bund agreements (same city? see below)
length(unique(model_data$PlaceID[model_data$Vrtrb==1&model_data$BundTotPrev>0]))
# expulsions in 28 cities that were members of alliances
sum(unique(mydata_bunde$PlaceID) %in% unique(model_data$PlaceID))
# 76 cities that participated in Bünde are in the model data
length(unique(model_data$PlaceID[model_data$Bund>0])) 
# 32 cities participated in Bünde during the study period

# linear model isn't much help, tiny coefficients
# summary(lm(Vrtrb~BundTotPrev, data=model_data))

# Table of cities that participated in Bünde
mydata_bunde %>%
  filter(PlaceID %in% unique(model_data$PlaceID)) %>%
  group_by(PlaceID, PlaceName) %>%
  summarize(minYear=min(Year), maxYear=max(Year), totBund=n())

```

About 40% of expulsions were in places party to prior Bund treaties. They occurred in 28 of the 76 cities in the sample that participated in Bünde. That's a pretty high ratio!

#### What are the trends by network variables? 


```{r histograms_roads, eval=TRUE, echo=TRUE, fig.height = 6, fig.width = 6}

# By route connectivity
table(model_data$RouteCount, model_data$Vrtrb) # 
round(prop.table(table(model_data$RouteCount, model_data$Vrtrb), margin=2),3)
summary(aov(model_data$RouteCount~as.factor(model_data$Vrtrb)))

# what about when grouped by place (since RouteCount is constant)?
Expl.routes <- model_data %>%
  dplyr::select(PlaceID, Vrtrb, RouteCount, dist_Constance) %>%
  group_by(PlaceID, RouteCount, dist_Constance) %>% 
  summarize(Expl_sum = sum(Vrtrb), # total expulsions in a place
    Expl_bin = ifelse(sum(Vrtrb)>0,1,0)) # binary, if ever

# Repeat, summarized by city
table(Expl.routes$RouteCount, Expl.routes$Expl_bin) # 
round(prop.table(table(Expl.routes$RouteCount, Expl.routes$Expl_bin), margin=2),3)
summary(aov(Expl.routes$RouteCount~as.factor(Expl.routes$Expl_bin)))

# Do highly-connected cities expel after exposure?

ggplot(data=model_data, 
  aes(y=RouteCount, x=Expl_sum10)) + 
  geom_jitter() +
  facet_wrap(~ Vrtrb, ncol=1) +
  labs(title="Are high-exposure cities more likely to expel?", 
    y="Expulsions in last 10 years", x="Count of routes") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

ggplot(data=model_data, 
  aes(x=RouteCount, y=Expl_sum10)) + 
  geom_smooth(method=lm) +
  facet_wrap(~ Vrtrb, ncol=1) +
  labs(title="Are high-exposure cities more likely to expel?", 
    x="Expulsions in last 10 years", y="Count of routes") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

# Or do expulsions in highly-connected cities have higher impact?

Expl.years.routes <- model_data %>%
  dplyr::select(Year, Vrtrb, RouteCount) %>%
  mutate(routes=ifelse(RouteCount>2,"high","low")) %>%
  group_by(Year, routes) %>% 
  summarize(Setts_n = n(), # total settlements in a year
    Expl_sum = sum(Vrtrb), 
    Expl_pct = sum(Vrtrb)/n()) 

# plot of expulsions by year - counts - identified by connections
ggplot(data=Expl.years.routes, aes(x=Year, y=Expl_sum, fill=factor(routes, levels=c("low", "high")))) + 
  geom_col() +
  scale_x_continuous(breaks=seq(1375,1520,20)) +
  scale_fill_manual(values=c("gray","red"), 
                    name = "Route connections", labels = c("low", "high")) +
  labs(title="Do expulsions in highly-connected cities have outsize impact?", 
    x="Year", y="Total expulsions") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))
```
  
  The distribution of road connections among expelling vs non-expelling city-year observations is similar. When aggregated at the city level (since the number of routes is a constant), things are still pretty comparable. However, running an ANOVA does say there is significant (p<0.01) difference between the two. What's happening is differences at the tail, especially among expelling cities, where having 1-2 cities with a high count of routes is a much higher proportion of all expelling places than is 1-2 high-route cities than did not expel. So while the difference is technically significant, I'm skeptical that it is substantively significant. For example, among all places on >4 routes (n=44), about 13% (n=6) ever expelled. That's a bit high, but not outlandishly high, given two-way comparisons for other variables. In sum, for the count of routes, the expulsions distribution has a fatter tail and lower peak than the non-expulsions distribution.
  
  Just looking at the pattern of expulsions through time, it does not appear to be the case that expulsions in highly connected cities set off expulsions elsewhere. There is not a jump in expulsions after one in a connected city, or at least not that is distinguishable from a simultaneous(-ish) expulsion in a less-connected city. But is there a temporal pattern? Are expulsions after one in a connected city close to that connected city?
  
  
```{r pointpattern_routes, eval=TRUE, echo=FALSE}

# point pattern analysis for expulsions in highly-connect cities vs less-connected
# variables: RouteCount, Vrtrb, Year, XCOORD, YCOORD
# using a look-up table of expulsions, check for expulsions in next 10 years, and then calculate # and distances
# split by routes (high, low), and compare

# make a smaller dataframe of just expulsions that is easier to work with
# kept useful/interesting/model vars, in case this I make a map later
routes_pp <- mydata_geo %>%
  filter(Year>=1385, Vrtrb==1) %>%
  dplyr::select("Year", "PlaceID", "ObsID", "PlaceName", "XCOORD", "YCOORD",
         "Vrtrb","King", "Prince", "Lord", "minor", "imperial", "free", "city",
         "RelFund", "RelHouse", "Archbishop", "Bishop", "Diocese",
         "BundTotPrev", "AuthTotal", "Schoeffen", "Castellan", "Schlthss", 
         "VfgTotal", "VrtTotPrev", 
         "RouteCount", "J_dev_sum", "Mkt2", "Frgn", "Mint")

# collect the expulsions by year, to be matched by year to each expulsion
years_nested <- routes_pp %>%
  group_by(Year) %>%
  nest() %>%
  ungroup()

routes_pp <- routes_pp %>%
# split on RouteCount (2 is 3rd qt)  
  mutate(connection = ifelse(RouteCount>2, 1, 0),
         next10 = map(Year,  # make a list-column of nested subsequent
                .f = function(x)  # expulsions, and unnest by years
                  filter(years_nested, Year>x&Year<=(x+10)) %>% 
                  unnest(cols = c(data)))) %>%
  mutate(next10 = map(next10, bind_rows)) %>% # make dfs, not lists
  mutate(next10_n = map_dbl(next10, ~dim(.x)[[1]])) %>% # get counts of next years
  mutate(next10_xcoords = map(next10, "XCOORD"),
         next10_ycoords = map(next10, "YCOORD"))
# gave up on getting xcoord and y coords in the same list-column
# Next: getting the distances between an expelling place and all expulsions  
# in the next 10 years. Combine into one df, get euclidean distances,
# drop the first (distance of the expeller to itself) - still in meters!
routes_pp <- routes_pp %>%
  mutate(next10_coords =
           map2(next10_xcoords, next10_ycoords,
               .f= function(x,y)
                   cbind(XCOORD=unlist(x),YCOORD=unlist(y)))) %>%
  mutate(next10_dist = vector("list", dim(.)[1]))

# don't want to spend any more hours trying get this to run via map2(), so just do it in a loop
# combine the expulsion's coordinates with the successive expulsion coordinates,
# then get euclidean distances using dist()
for (i in 1:dim(routes_pp)[1]) {
   dist(rbind(c(routes_pp$XCOORD[i],routes_pp$YCOORD[i]), 
       routes_pp$next10_coords[i][[1]]), method="euclidean") %>% as.matrix() %>% 
   as.data.frame() %>% .[2:nrow(.),1] -> routes_pp$next10_dist[i][[1]]
}
# dist() creates a "dist" object, a modified symmetrical matrix, 
# so convert to a dataframe from which just the first 
# column (dist to all others) can be saved, excluding dist. to self
# (in the first row of first colum)

# this loop is adding data: when there are no successive expulsions,
# the stored list is c(NA, 0). Strip those 0's out later.

# make one tidy df with a row for each distance
# the distances are in a list-column with unnamed elements, which requires
# some extra steps, like converting to df and then adding a name,
# because unnest() requires a df, not a list
routes_tidy <- routes_pp %>%
  dplyr::select(Year, PlaceID, ObsID, PlaceName, RouteCount, connection, next10_n, next10_dist) %>%
  mutate(next10_dist = map(next10_dist, ~as.data.frame(.) %>% set_names(c("nextdist")))) %>%
  unnest(next10_dist, keep_empty = TRUE) %>% # if none, preserve the row
  mutate(nextdist_km=nextdist/1000) # convert to km

# strip out rows I accidentally introduced
# keep where no subsequent and dist is correctly NA, or where
# there are subsequent expulsions
routes_tidy <- subset(routes_tidy, (next10_n==0&is.na(nextdist_km))|(next10_n>0)) 

# Frequency polygons for distance(s) to subsequent expulsions for highly vs less connected cities
ggplot(data=routes_tidy, aes(x=nextdist_km,
        color=factor(connection,levels=c(0,1),labels=c("low", "high")))) + 
  geom_freqpoly(binwidth=50) +
  labs(title="Distance to successive expulsions", 
    x="Distance (km)", y="Total expulsions") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))
  
# Boxplots for distance(s) to subsequent expulsions for highly vs less connected cities
ggplot(data=routes_tidy, aes(x=nextdist_km, y=factor(connection,levels=c(0,1),labels=c("low", "high")),
        fill=factor(connection,levels=c(0,1),labels=c("low", "high")))) + 
  geom_boxplot(show.legend=FALSE) +
  scale_fill_discrete(labels = c("low", "high")) +
  labs(title="Distance to successive expulsions", 
    x="Distance (km)", y="Connectedness of expelling city") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

# use routes_pp which has a row for each expulsion
# instead of routes_tidy which has a row for each subsequent expulsion
ggplot(data=routes_pp, aes(x=next10_n, 
        color=factor(connection,levels=c(0,1),labels=c("low", "high")))) + 
  geom_freqpoly(binwidth=1) +
  labs(title="Subsequent expulsions", 
    x="Subsequent expulsions", y="Frequency") +
  mytheme +
  theme(text=element_text(family="serif"),
        plot.subtitle=element_text(size=14),
        axis.text=element_text(size=14,face="bold"),
        axis.title=element_text(size=14,face="bold"))

# comparing distributions: does it make sense to calculate the RMSE for
# each expulsion, using distances to successive expulsions?
  

```
  
  Comparing the distributions of distances to subsequent expulsions, highly connected cities (routes>3rd quartile) do not spur close expulsions. There is no real visual difference. The distribution for expulsions in highly connected cities is, if anything, farther away than for less-connected cities.
  
  I really don't think it makes sense to pull in network measures from a different sample of cities. But, I can try the data from Becker et al. (2020), even though it's a slightly later time period.
    

```{r netposition, eval=TRUE, echo=FALSE}
city_net <- read_dta(here::here("Witchtrial_diffusion", "BPR_data.dta"))

model_data_net <- mydata_geo %>%
  dplyr::select(PlaceID,PlaceName) %>% # just necessary values
  filter(PlaceID %in% model_data$PlaceID) %>% # only cities in final sample
  distinct() %>% # unique rows, in order to merge one-to-many
  merge(model_data, by="PlaceID") %>% # add PlaceNames
  merge(city_net, by.x="PlaceName", by.y="city", # add network variables
        all.x=TRUE, all.y=FALSE) # don't add rows for places not in the sample

# For how many city-year obs are there matching network measures?
sum(is.na(model_data_net$degree))/nrow(model_data_net) # 80% missing
# For how many expulsions are there matching network measures?
summary(model_data_net$degree[model_data_net$Vrtrb==1]) #44 NA's - nearly half
# How do the two samples compare?
# model_data repeats cities; make df with only one row for each
model_data_net_sum <- model_data_net %>% dplyr::select(PlaceID, PlaceName, RouteCount, degree, closeness, betweenness, eigenvector) %>% distinct
# how many cities did not match?
sum(is.na(model_data_net_sum$degree))/nrow(model_data_net_sum) # 83% missing
# Are the network measures correlated with what I am already using?
cor(model_data_net_sum[c("RouteCount","degree", "closeness", "betweenness", "eigenvector")],
    use="complete.obs", method="pearson") 
# not strong, but good for degree and betweenness
# Compare specific measures
summary(city_net$degree)
summary(model_data_net_sum$degree) # same, except max is lower
# summary(city_net$closeness) # all 0
# summary(model_data_net_sum$closeness) # all 0
summary(city_net$betweenness)
summary(model_data_net_sum$betweenness) # central weight is much higher
summary(city_net$eigenvector)
summary(model_data_net_sum$eigenvector) # not helpful!

# Given what I have collected about routes, what kinds of places
# can I match vs not?
table(model_data_net_sum$RouteCount,is.na(model_data_net_sum$degree))
round(prop.table(table(model_data_net_sum$RouteCount,is.na(model_data_net_sum$degree)), margin=2),3) # missing for a *lot* of 0-route obs

```
    
  I think my fears are well-founded. The portion of my sample for which I can match network variables is not an unbiased sample of the overall city network. Degree is basically the same, though the overall network max is higher. (Closeness is effectively 0 for both) Betweenness is a little more comparable, but surprisingly the bulk of my sample is above the distribution of the main data (which is skewed higher). As far as eigenvector centrality, the 3rd quartile for the matches in my sample is relatively close to the median of the whole network, and the max shows right-censoring. Comparing RouteCount for those matched vs not, I see that there are a *lot* of missing matches for 0-route cities. So while some values are comparable (maybe degree? maybe betweenness?), overall the matched cities are more likely to be more connected (but distributions are the same from 4 routes and above).

```{r bund_net, eval=FALSE, echo=FALSE}
# First, data exploration! Does it make sense to do network analysis of expulsions among Bund-signing cities?

mydata_bunde %>% dplyr::select(PlaceID) %>% 
  distinct %>% nrow # 78 cities ever signed
mydata_bunde %>% filter(Year>1384) %>% dplyr::select(PlaceID) %>% 
  distinct %>% nrow # 41 cities signed after 1384
mydata_bunde %>% dplyr::select(PlaceID) %>% 
  filter(PlaceID %in% model_data$PlaceID) %>% distinct %>% nrow 
# 76 cities in the model data
mydata_bunde %>% filter(Year>1384) %>% dplyr::select(PlaceID) %>% filter(PlaceID %in% model_data$PlaceID) %>% distinct %>% nrow #all 41 that signed after 1384 are in the final sample


# how many bund agreements made in and around the time of my sample?
bunde <- mydata_bunde %>%
  filter(Year>1350) %>%
  dplyr::select(Year, PlaceID, Städtebund, Participants) %>%
  group_by(Year, Städtebund, Participants) %>%
  summarize(in_data = n())

table_bunde <- mydata_bunde %>%
  filter(Year>1350) %>%
  dplyr::select(PlaceID, Städtebund) %>%
  distinct %>%
  group_by(Städtebund) %>%
  summarize(cities = n())

table_bunde <- bunde %>%
  group_by(Städtebund) %>%
  summarize(treaties = n(), first = min(Year), last = max(Year)) %>%
  merge(table_bunde, by = "Städtebund")

# get expulsions by bunde cities
mydata_bunde_expl <- mydata_geo %>%
  dplyr::select(PlaceID, PlaceName, Year, Vrtrb) %>%
  filter(PlaceID %in% mydata_bunde$PlaceID) %>%
  filter(Vrtrb == 1)
nrow(mydata_bunde_expl) # 51 overall
mydata_bunde_expl %>% filter(Year>1384) %>% nrow # 47

# how many expulsions per bund?
table_expl_bunde <- mydata_bunde %>% 
  dplyr::select(PlaceID, PlaceName, Städtebund) %>%
  distinct %>%
  merge(mydata_bunde_expl, by = c("PlaceID", "PlaceName")) %>%
  filter(Year>1350) %>%
  group_by(Städtebund) %>%
  summarize(Expl_total = sum(Vrtrb), Expl_first = min(Year), Expl_last = max(Year))

table_expl_bunde_all <- mydata_bunde %>% 
  dplyr::select(PlaceID, PlaceName, Städtebund) %>%
  distinct %>%
  merge(mydata_bunde_expl, by = c("PlaceID", "PlaceName")) %>%
  filter(Year>1350) %>%
  dplyr::select(Städtebund, PlaceName, Year, Vrtrb) %>%
  arrange(Städtebund, Year)
# a few are cities that were members of two Bünde, and therefore are double-counted in the summary by Bund

# did the expulsions happen before, during, or after Bund activity?
table_bunde <- merge(table_bunde, table_expl_bunde, by = "Städtebund")
# Elsässische: much later (60 yrs)
# Rheinische: later, but not as much (20 yrs)
# Sächsische: during and after
# Schwäbische: later (40 years)

# do a model with just the bunde cities?
# use (expulsions among future allies in 10 yrs prior to treaty) and (expulsions among allies in 10/20 years after treaty)?
# Check first: calculate expulsions by Bund in the preceeding 10 and subsequent 10/20 years to a specific agreement
mydata_bunde %>% merge(mydata_bunde_expl, by=c("PlaceID", "PlaceName")) %>%
  filter((Year.x-Year.y<=10)&(Year.x-Year.y>=-20)) %>%
  mutate(Expl_pre10 = ifelse((Year.x-Year.y>0)&(Year.x-Year.y<=10),1,0),
         Expl_post20 = ifelse((Year.x-Year.y<0)&(Year.x-Year.y>=-20),1,0)) %>%
  group_by(Year.x, Städtebund) %>%
  summarize(Expl_pre10 = sum(Expl_pre10),
            Expl_post20 = sum(Expl_post20))
# it seems the Säschische Bund is the only one where expulsions happened in the 10 years preceeding or 20 years subsequent to an expulsion. The 1430 treaty was specifically an alliance against the Hussite threat, according to Distler

mydata_bunde %>% merge(mydata_bunde_expl, by=c("PlaceID", "PlaceName")) %>%
  filter((Year.x-Year.y<=10)&(Year.x-Year.y>=-20)) %>%
  mutate(Expl_pre10 = ifelse((Year.x-Year.y>0)&(Year.x-Year.y<=10),1,0),
         Expl_post20 = ifelse((Year.x-Year.y<0)&(Year.x-Year.y>=-20),1,0)) %>%
  dplyr::select(Year.y, PlaceName, Städtebund, Year.y) %>%
  distinct %>%
  arrange(Year.y)
# and really, we're just talking about five expulsions being -10/+20 years of a treaty for the Sächsische Bund

# calculate how many expulsions within the Bund in the prior 10 years
table_expl_bunde_ann <- table_expl_bunde_all %>%
  group_by(Städtebund) %>%
  complete(nesting(Städtebund), Year = seq(1385, 1520, 1L)) %>%
  ungroup %>%
  group_by(Städtebund, Year) %>%
  summarize(Expl_ann = sum(Vrtrb)) %>%
  ungroup %>%
  mutate(Expl_ann = replace_na(Expl_ann, 0)) %>%
  group_by(Städtebund) %>%
  arrange(Year) %>%
  mutate(Expl_pre10 = rollapplyr(Expl_ann, width=list(-1:-10), sum, # 10 year
                           na.rm=TRUE, fill=0, align="right", partial=TRUE),
         Expl_total = cumsum(Expl_ann)) %>%
  ungroup() %>%
  arrange(Städtebund, Year)

table_expl_bunde_sums <- table_expl_bunde_ann %>% filter(Expl_ann>0)

# calculate how many parties in a Bund did/not have Jews? But Bund size/membership changes over time, making comparisons difficult

```
  
### Descriptive statistics
  
```{r tables_setup, eval=TRUE, echo=FALSE}
table_1_vars <- c("Vrtrb", "Year",
                "Expl_sum10",
                "postConstance", "dist_Constance",
                "const10_FreeImp","const10_Princely","const10_Episc",
                "Expl_sum10_FreeImp","Expl_sum10_Princely", "Expl_sum10_Episc",
                "dist10_rivroad",
                "BundTotPrev",
                "RouteCount", "dist2snap_roads", "onroad")

table_2_vars <- c("ArchbishopBin", "BishopBin", "AuthEpisc","free", "imperial", "AuthFreeImp", "PrinceBin", 
                "AuthTotal",
                "Schoeffen",
                "J_dev_sum",
                "VfgTotal","VrtTotPrev",
                "Mkt2", "Frgn","Mint")

table_b2_5_vars <- c("Expl_sum5", 
                   "dist5_rivroad",
                   "Expl_sum5_Episc", "Expl_sum5_FreeImp",
                   "const5_Episc", "const5_FreeImp")
table_b2_20_vars <- c("Expl_sum20", 
                   "dist20_rivroad",
                   "Expl_sum20_FreeImp", "Expl_sum20_Episc",
                   "const20_FreeImp","const20_Episc")

```
```{r descriptives_setup, eval=TRUE, echo=FALSE}
# all possible variables...
vars_list <- c("Vrtrb",
              "rugged","dist2snap_roads","onroad","RouteCount","RouteLog2",
              "Expl_sum5","Expl_sum10","Expl_sum20",
              # "dist1_euc","dist5_euc","dist10_euc","dist20_euc",
              "dist1_rivroad","dist5_rivroad","dist10_rivroad","dist20_rivroad",
              "Expl_lag1_FreeImp","Expl_sum5_FreeImp","Expl_sum10_FreeImp","Expl_sum20_FreeImp",
                "Expl_lag1_Prince","Expl_sum5_Princely","Expl_sum10_Princely","Expl_sum20_Princely",
                "Expl_lag1_Episc","Expl_sum5_Episc","Expl_sum10_Episc","Expl_sum20_Episc",
              "const1_FreeImp","const5_FreeImp","const10_FreeImp","const20_FreeImp",
                "const1_Princely","const5_Princely","const10_Princely","const20_Princely",
                "const1_Episc","const5_Episc","const10_Episc","const20_Episc",
              "King", "Prince", "Lord", "minor", "imperial", "free", "city",
                "RelFund", "RelHouse", "Archbishop", "Bishop",
                "KingBin", "PrinceBin", "LordBin", "minorBin", "cityBin",
                "RelFundBin", "RelHouseBin", "ArchbishopBin", "BishopBin",
                "AuthMajor","AuthRel","AuthNob","AuthRelOnly","AuthNobOnly", "AuthFreeImp", "AuthEpisc",
              "AuthTotal","AuthShare",
              "DomChg","DomTenure","DomChgTotal","DomChg_sum5","DomChg_sum10",
              "DomRts","DomSld","DomMtg","DomFf","DomCqr","DomFam","DomOth",
              "StRt","Residence","Amt","Castellan","Rat","Mayor","Schoeffen","Schlthss","HighJust",
              "BundTotPrev","Bund_sum5", "Bund_sum10",
              "Diocese",
              "Gmnde","Ldrs","J_dev_bin", "J_dev_sum",
              "Vfg","VfgTotal","Vfg_sum5","VrtTotPrev",
              "prc_lag1", "temp_lag1",
              "Mkt2", "Frgn","Transit","Tolls","Trade","Mint",
              "Year")

descr_all <- sapply(model_data_all, function(x) rbind(count = length(which(!is.na(x))), 
                                  minimum = min(as.numeric(x), na.rm=TRUE),
                                  maximum = max(as.numeric(x), na.rm=TRUE),
                                  mean = mean(as.numeric(x), na.rm=TRUE),
                                  median = median(as.numeric(x), na.rm=TRUE),
                                  stdev = sd(as.numeric(x), na.rm=TRUE)))
rownames(descr_all) <- c("n","Min","Max","Mean","Median","St.Dev")
colnames(descr_all) <- dimnames(descr_all)[[2]]

descr_post <- sapply(model_data, function(x) rbind(count = length(which(!is.na(x))), 
                                  minimum = min(as.numeric(x), na.rm=TRUE),
                                  maximum = max(as.numeric(x), na.rm=TRUE),
                                  mean = mean(as.numeric(x), na.rm=TRUE),
                                  median = median(as.numeric(x), na.rm=TRUE),
                                  stdev = sd(as.numeric(x), na.rm=TRUE)))
rownames(descr_post) <- c("n","Min","Max","Mean","Median","St.Dev")
colnames(descr_post) <- dimnames(descr_post)[[2]]

descr_scaled <- sapply(model_data_scaled, function(x) rbind(count = length(which(!is.na(x))), 
                                  minimum = min(as.numeric(x), na.rm=TRUE),
                                  maximum = max(as.numeric(x), na.rm=TRUE),
                                  mean = mean(as.numeric(x), na.rm=TRUE),
                                  median = median(as.numeric(x), na.rm=TRUE),
                                  stdev = sd(as.numeric(x), na.rm=TRUE)))
rownames(descr_scaled) <- c("n","Min","Max","Mean","Median","St.Dev")
colnames(descr_scaled) <- dimnames(descr_scaled)[[2]]


descr_20 <- sapply(model_data_20, function(x) rbind(count = length(which(!is.na(x))), 
                                  minimum = min(as.numeric(x), na.rm=TRUE),
                                  maximum = max(as.numeric(x), na.rm=TRUE),
                                  mean = mean(as.numeric(x), na.rm=TRUE),
                                  median = median(as.numeric(x), na.rm=TRUE),
                                  stdev = sd(as.numeric(x), na.rm=TRUE)))
rownames(descr_20) <- c("n","Min","Max","Mean","Median","St.Dev")
colnames(descr_20) <- dimnames(descr_20)[[2]]

```
   
 Table 1. Table of descriptives for unscaled data, diffusion variables 
   
```{r table_1_descriptives_diffusion, eval=TRUE, results='asis', echo=FALSE}
stargazer(t(descr_post[,table_1_vars]), digits=2, type = "html", summary = FALSE, align=TRUE, out=here::here("Diffusion","out_tables","table_1_descriptives_diffusion.htm"))
```
  
 Table 2. Table of descriptives for unscaled data, adoption variables
 
```{r table_2_descriptives_adoption, eval=TRUE, results='asis', echo=FALSE}
stargazer(t(descr_post[,table_2_vars]), digits=2, type = "html", summary = FALSE, align=TRUE, out=here::here("Diffusion","out_tables","table_2_descriptives_adoption.htm"))
```
  

```{r contingency_tables, eval=TRUE, echo=FALSE}

# Dominion
table(model_data$AuthFreeImp[model_data$Vrtrb==1],model_data$AuthTotal[model_data$Vrtrb==1]) # 33 expulsions in free/imp-only
table(model_data$PrinceBin[model_data$Vrtrb==1],model_data$AuthTotal[model_data$Vrtrb==1]) # 24 in prince-only; 16 in prince+
table(model_data$AuthEpisc[model_data$Vrtrb==1],model_data$AuthTotal[model_data$Vrtrb==1]) # 21 in episcopal-only; 7 in episcopal+
table(model_data$KingBin[model_data$Vrtrb==1],model_data$AuthTotal[model_data$Vrtrb==1]) # 2 in King-only; 3 in King+
table(model_data$LordBin[model_data$Vrtrb==1],model_data$AuthTotal[model_data$Vrtrb==1]) # 1 in Lord-only
table(model_data$minorBin[model_data$Vrtrb==1],model_data$AuthTotal[model_data$Vrtrb==1]) # 3 in minor+
table(model_data$cityBin[model_data$Vrtrb==1],model_data$AuthTotal[model_data$Vrtrb==1]) # 1 in city-only; 2 in city+

table(model_data$Residence, model_data$Vrtrb) # 13 in residence cities

# Religion
table(model_data$postConstance,model_data$Vrtrb) # 98 post-Constance
table(model_data$Diocese,model_data$Vrtrb) # 22 in diocesan seats
table(model_data$AuthEpisc[model_data$Vrtrb==1],model_data$Diocese[model_data$Vrtrb==1]) # 18 in seats under episcopates; 10 in non-seats under others; 4 in seats under others
```

Overall, 7 expulsions were in cities ruled by other than free/imperial, princely, or the Church.

Now visualize the distributions of the various diffusion measures, comparing observations with versus without expulsions. They are not all continuous, so histograms would seem to make sense, but the raw volume of non-expulsions makes histograms useless. Only the non-expulsions are visible. Instead, use density plots!

```{r density_plots, eval=TRUE, echo=FALSE}

# Do small multiples here. They do not have similar scales, so they need to be created separately and then stitched together with cowplot.

# spatial
p.dens01 <- ggplot(model_data, aes(x = dist_Constance, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="km to Constance", y="", x="") + mytheme

# temporal
p.dens21 <- ggplot(model_data, aes(x = Expl_sum10, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif1: any others", y="", x="") + mytheme 
p.dens22 <- ggplot(model_data, aes(x = Expl_sum10_Episc, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif2: in episcopal", y="", x="") + mytheme 
p.dens23 <- ggplot(model_data, aes(x = Expl_sum10_FreeImp, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif2: in free/imperial)", y="", x="") + mytheme 
p.dens24 <- ggplot(model_data, aes(x = Expl_sum10_Princely, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif2: in princely", y="", x="") + mytheme
p.dens25 <- ggplot(model_data, aes(x = Expl_sum10_DomGrp, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif2: in peers", y="", x="") + mytheme

# distance-moderated
p.dens31 <- ggplot(model_data, aes(x = dist10_rivroad, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif3: any others", y="", x="") + mytheme
p.dens32 <- ggplot(model_data, aes(x = const10_Episc, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif3: in episcopal", y="", x="") + mytheme 
p.dens33 <- ggplot(model_data, aes(x = const10_FreeImp, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif3: in free/imperial", y="", x="") + mytheme 
p.dens34 <- ggplot(model_data, aes(x = const10_Princely, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif3: in princely", y="", x="") + mytheme 
p.dens35 <- ggplot(model_data, aes(x = const10_DomType, fill = as.factor(Vrtrb))) +
   geom_density(alpha = 0.2) +
  scale_fill_manual(values=c("gray","black"), , 
                    name = "Observations _________ expulsion",
      labels = c("without", "with")) + 
  labs(title="Dif3: in peers", y="", x="") + mytheme

# remember that some of these are essentially zero-inflated!
p.densities_all <- cowplot::plot_grid(p.dens21 + theme(legend.position="none"), p.dens22+ theme(legend.position="none"), p.dens23+ theme(legend.position="none"), p.dens24+ theme(legend.position="none"), p.dens25+ theme(legend.position="none"), p.dens31+ theme(legend.position="none"), p.dens32+ theme(legend.position="none"), p.dens33+ theme(legend.position="none"), p.dens34+theme(legend.position="none"), p.dens35+theme(legend.position="none"), p.dens01+ theme(legend.position="none"),
                  ncol = 4,
                  align = 'hv',
                  labels = c(""),
                  hjust = -1
                  )


legend_b <- get_legend(p.dens01 + 
                         theme(legend.position="bottom", 
                               legend.direction = "horizontal"))

p.compare_densities <- cowplot::plot_grid(p.densities_all, legend_b, 
                                          ncol = 1, rel_heights = c(1,.1))

plot(p.compare_densities)
```

Huh - these actually don't look that different. And the distance-scaled measures are super-skewed. If anything, it seems like there should be positive relationships with the diffusion measures.

***
## Bayesian Regression Analysis

This project imitates discrete time event-history analysis. Unlike in other, more typical survival or hazard models, I am not interested in the time-to-event. I'm looking at the hazard in each discrete time period (year, in this case). Discrete-time event history analysis is equivalent to logistic regression. Due to some assumption violations about the distribution and nature of the data, I am using Bayesian logistic regression, which entails setting priors.  

```{r models_setup, eval = TRUE, echo=FALSE}

# Learning
# Pattern 1: learning/observation short-term spike with broad reach, transit access, and technical communities

x.learning <- as.formula(Vrtrb ~  
  # diffusion measures:
    Expl_sum10 + # universal temporal trend - Dif1
    RouteCount + # information flow
    BundTotPrev +  # history of policy community
  # local factors:
    AuthEpisc + free + imperial + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)

# Social construction
# Pattern 2: desirability - neighborhood change

x.nhood <- as.formula(Vrtrb ~  
  # diffusion measures:
    dist10_rivroad + # local spatial trend - Dif2
  # local factors:
    AuthEpisc + free + imperial + PrinceBin + 
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)

# Pattern 3: religion-prompted cultural change
# increase from 1418, spatial pattern close to Constance, under religious rulers

x.relchange <- as.formula(Vrtrb ~  
  # diffusion measures:
    Expl_sum10 + 
    Expl_sum10 * postConstance + # interact post-Constance
    AuthEpisc * postConstance + # episcopal, post-Constance
    dist_Constance + # with spatial decay
  # local factors:
    AuthEpisc + free + imperial + PrinceBin + 
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    #Diocese + # religious center - very few
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)
  
# Pattern 4: increase from 1418, led & within religious rulers
# Pattern 5: increase from 1418, led & within autonomy-seeking cities
# Dif3: Expulsions for shared dominion type (10-year)

x.segmented <- as.formula(Vrtrb ~  
  # diffusion measures:
    Expl_sum10_Episc + # Church-led
    Expl_sum10_Episc * AuthEpisc + # within Church
    Expl_sum10_FreeImp + # autonomous-led
    Expl_sum10_FreeImp * AuthFreeImp + # within autonomous
    dist_Constance + # with universal spatial trend
  # local factors:
    AuthEpisc + AuthFreeImp + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)

x.segdistance <- as.formula(Vrtrb ~  
  # diffusion measures:
    Expl_sum10_Episc + # Church-led
    Expl_sum10_Episc * AuthEpisc + # within Church
    const10_Episc + # with meaning-based spatial trend - Dif4
    Expl_sum10_FreeImp + # autonomous-led
    Expl_sum10_FreeImp * AuthFreeImp + # within autonomous
    const10_FreeImp + # with meaning-based spatial trend - Dif4
    dist_Constance + # with universal spatial trend
  # local factors:
    AuthFreeImp + AuthEpisc + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)

# Pattern 6: increase from 1418 led by autonomous cities plus suppression nearby – temporal, categorical, spatial

x.hybrid <- as.formula(Vrtrb ~  
  # diffusion measures:
    postConstance + # historical change
    dist_Constance + # universal spatial trend
    Expl_sum10_FreeImp + # autonomous-led
    dist10_rivroad + # local spatial trend
  # local factors:
    AuthFreeImp + AuthEpisc + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)

```
 
###Calculate results

Use Bayesian GLM in rstanarm from authors/Gelman et al., and set weakly informative priors.

 Silently run the main models. Print the number of divergent transitions in each estimation.
 
```{r models_run, echo = FALSE, eval= FALSE, include = FALSE}

# with default priors, chooses _intercept = normal(0,10) and 
# coefs location = 0, adjusted scales range a lot
# error from Cauchy: log prob evals to log(0), initialization failed after 100 attempts
# seems like I might need to stdize all non-binary vars to 0, sd=0.5 in order to use Gelman's recs for sparse data student_t(3<nu<7, 0, s)
# see https://www.kaggle.com/avehtari/bayesian-logistic-regression-with-rstanarm
# see https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations

t_prior <- student_t(df=7, location=0, scale=2.5)
half_t_prior <- student_t(df=4, location=0, scale=1)

# some options-setting to make things a little faster
rstan::rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

# x.learning, x.relchange, x.nhood, x.segmented, x.segdistance, x.hybrid

fit_learning <- stan_glmer(x.learning,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_learning, here::here("Diffusion", "out_files", "fit_learning.rds"))
# 40 divergent
fit_learning <- readRDS(here::here("Diffusion", "out_files", "fit_learning.rds"))

fit_relchange <- stan_glmer(x.relchange,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_relchange, here::here("Diffusion", "out_files", "fit_relchange.rds"))
# 1 divergent - reran! no kfold updates though...
fit_relchange <- readRDS(here::here("Diffusion", "out_files", "fit_relchange.rds"))

fit_nhood <- stan_glmer(x.nhood,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_nhood, here::here("Diffusion", "out_files", "fit_nhood.rds"))
# 64 divergent
fit_nhood <- readRDS(here::here("Diffusion", "out_files", "fit_nhood.rds"))

fit_segmented <- stan_glmer(x.segmented,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_segmented, here::here("Diffusion", "out_files", "fit_segmented.rds"))
# 4 divergent
fit_segmented <- readRDS(here::here("Diffusion", "out_files", "fit_segmented.rds"))

fit_segdistance <- stan_glmer(x.segdistance,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_segdistance, here::here("Diffusion", "out_files", "fit_segdistance.rds"))
# 6 divergent
fit_segdistance <- readRDS(here::here("Diffusion", "out_files", "fit_segdistance.rds"))

fit_hybrid <- stan_glmer(x.hybrid,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_hybrid, here::here("Diffusion", "out_files", "fit_hybrid.rds"))
# 53 divergent
fit_hybrid <- readRDS(here::here("Diffusion", "out_files", "fit_hybrid.rds"))

```
  
   
### Diagnostics and Assessing Fit

  In March 2020 I ran the models using raw, unscaled data and trying different measurements of years. I wanted to confirm my prior experience that not scaling the continuous data messed with the models. Using unscaled data, the models had many more divergences - they just had a harder time running when the range of the variables was far outside the [0,1] range of a binomial-based distribution. I really do need some control for change over time, and continuous (re-scaled) years is a) not particularly interpretable theoretically and b) not uniquely useful for the estimation. The models I settled on use 15 year periods because I don't want them to be identical to my 10 year lags and because I don't want a period shorter than 10 years that would end up capturing short-term spikes. There was essentially no difference in point estimates or their distributions between 15yr and 9yr period dummies. I continue to prefer 15yr.
  
  
```{r models_diagnostics, eval=FALSE, echo = FALSE, include = FALSE}

sso_learning <- shinystan::as.shinystan(fit_learning, ppd = FALSE)
if (interactive()) launch_shinystan(sso_learning)
sso_nhood <- shinystan::as.shinystan(fit_nhood, ppd = FALSE)
if (interactive()) launch_shinystan(sso_nhood)
sso_relchange <- shinystan::as.shinystan(fit_relchange, ppd = FALSE)
if (interactive()) launch_shinystan(sso_relchange)
sso_segmented <- shinystan::as.shinystan(fit_segmented, ppd = FALSE)
if (interactive()) launch_shinystan(sso_segmented)
sso_segdistance <- shinystan::as.shinystan(fit_segdistance, ppd = FALSE)
if (interactive()) launch_shinystan(sso_segdistance)
sso_hybrid <- shinystan::as.shinystan(fit_hybrid, ppd = FALSE)
if (interactive()) launch_shinystan(sso_hybrid)
sso_peers <- shinystan::as.shinystan(fit_peers, ppd = FALSE)
if (interactive()) launch_shinystan(sso_peers)

```
  
  Using the shinystan app, it generally seems like things are okay. Neff is fine for all variables for all models, and so is R-hat. A few models have higher numbers of divergent transitions, but the proportion is still low, and the divergences do not appear to be clustered within the joint distributional space. Additionally, the distributions of the point estimates are relatively normal.

Use K-fold estimations to detect bias in the results.  
 
```{r cv_checks, eval=TRUE, echo = FALSE}

# Leave-One-Out Validation re-estimates a model as many times as there are problematic observations. If there are few problematic obs and it doesn't take that long to run, that's not really a problem. However, these models take 2-4 hours on high powered computing clusters, and they surely have many problematic obs.
# fit_learning_loo <- loo(fit_learning, cores = parallel::detectCores(),
#   k_threshold = 0.7)
# fit_nhood_loo <- loo(fit_nhood, cores = parallel::detectCores(),
#   k_threshold = 0.7)
# fit_relchange_loo <- loo(fit_relchange, cores = parallel::detectCores(),
#   k_threshold = 0.7)
# fit_segmented_loo <- loo(fit_segmented, cores = parallel::detectCores(),
#   k_threshold = 0.7)
# fit_segdistance_loo <- loo(fit_segdistance, cores = parallel::detectCores(),
#   k_threshold = 0.7)
# fit_hybrid_loo <- loo(fit_hybrid, cores = parallel::detectCores(),
#   k_threshold = 0.7)
# fit_peers_loo <- loo(fit_peers, cores = parallel::detectCores(),
#   k_threshold = 0.7)

# Instead, do k-fold cross-validation. It partitions data into k subsets and reruns k times, each leaving out a subset.
# Stratify by years. Stratifying by city is unrealistic given some have few obs. Years are somewhat balanced in number of observations. There really isn't a more balanced variable for number of obs and for expulsions.
# use "model_data_scaled" because that's what the estimations use
folds_Period <- loo::kfold_split_stratified(K = 10,
                                            x = model_data_scaled$Period15yrs)
# kfold cv requires a lot of memory and can force a shutdown and restart, so scale back how much of the computational power of the computer is being used for this - cores option is already set to 8, but set the local option cores=1 in order to run the k models successively instead of simultaneously

fit_learning_kfold <- kfold(fit_learning, folds = folds_Period, cores = 1)
saveRDS(fit_learning_kfold, here::here("Diffusion", "out_files", "fit_learning_kfold.rds"))
# fit_learning_kfold <- readRDS(here::here("Diffusion", "out_files", "fit_learning_kfold.rds"))

fit_nhood_kfold <- kfold(fit_nhood, folds = folds_Period, cores = 1)
saveRDS(fit_nhood_kfold, here::here("Diffusion", "out_files", "fit_nhood_kfold.rds"))
# fit_nhood_kfold <- readRDS(here::here("Diffusion", "out_files", "fit_nhood_kfold.rds"))

fit_relchange_kfold <- kfold(fit_relchange, folds = folds_Period, cores = 1)
saveRDS(fit_relchange_kfold, here::here("Diffusion", "out_files", "fit_relchange_kfold.rds"))
# fit_relchange_kfold <- readRDS(here::here("Diffusion", "out_files", "fit_relchange_kfold.rds"))

fit_segmented_kfold <- kfold(fit_segmented, folds = folds_Period, cores = 1)
saveRDS(fit_segmented_kfold, here::here("Diffusion", "out_files", "fit_segmented_kfold.rds"))
# fit_segmented_kfold <- readRDS(here::here("Diffusion", "out_files", "fit_segmented_kfold.rds"))

fit_segdistance_kfold <- kfold(fit_segdistance, folds = folds_Period, cores = 1)
saveRDS(fit_segdistance_kfold, here::here("Diffusion", "out_files", "fit_segdistance_kfold.rds"))
# fit_segdistance_kfold <- readRDS(here::here("Diffusion", "out_files", "fit_segdistance_kfold.rds"))

fit_hybrid_kfold <- kfold(fit_hybrid, folds = folds_Period, cores = 1)
saveRDS(fit_hybrid_kfold, here::here("Diffusion", "out_files", "fit_hybrid_kfold.rds"))
# fit_hybrid_kfold <- readRDS(here::here("Diffusion", "out_files", "fit_hybrid_kfold.rds"))

kfolds <- list(fit_learning_kfold, fit_nhood_kfold, fit_relchange_kfold, fit_segmented_kfold, fit_segdistance_kfold, fit_hybrid_kfold)

t(kfold_param_summary(kfolds))

loo_compare(fit_learning_kfold, fit_nhood_kfold)
loo_compare(fit_nhood_kfold, fit_relchange_kfold)
loo_compare(fit_relchange_kfold, fit_segmented_kfold)
loo_compare(fit_segmented_kfold, fit_segdistance_kfold)
loo_compare(fit_segdistance_kfold, fit_hybrid_kfold)
```
  
  
### Main results: Tables

   
```{r models_output, eval=TRUE, echo = FALSE, include = FALSE}
# model output: fit_learning, fit_nhood, fit_relchange, fit_segmented, fit_segdistance, fit_hybrid (, fit_peers)

# posterior_interval(fit_segdistance, prob = 0.89)[c(1:27),]

# Pattern 1: fit_learning

var.order.m1 <- c("Expl_sum10", "RouteCount", "BundTotPrev",
  "AuthEpisc", "free", "imperial", "PrinceBin",
  "AuthTotal", "Schoeffen", 
  "Mkt2", "Frgn", "Mint",
  "VfgTotal", "VrtTotPrev", "J_dev_sum",
  "Period15yrs(1400,1415]", "Period15yrs(1415,1430]", "Period15yrs(1430,1445]",
  "Period15yrs(1445,1460]", "Period15yrs(1460,1475]", "Period15yrs(1475,1490]",
  "Period15yrs(1490,1505]", "Period15yrs(1505,1520]",
  "(Intercept)", "Sigma[PlaceID:(Intercept),(Intercept)]", "mean_PPD", "log-posterior")

label.order.m1 <- c("Dif1", "Count of Travel Routes", "Total prior alliance treaties",
  "Episcopal", "Free", "Imperial", "Prince", 
  "Count of dominion parties", "Schöffen",  
  "Market development", "Foreign moneylenders", "Mint", 
  "Total Previous Persecutions", "Total Previous Expulsions",
  "Jewish community development", 
  "1401-1415", "1416-1430", "1431-1445",
  "1446-1460", "1461-1475", "1476-1490",
  "1491-1505", "1506-1520",
  "Global Intercept", "Sigma", "Mean PPD", "log-posterior")

# Pattern 2: fit_nhood

var.order.m2 <- c("dist10_rivroad",
  "AuthEpisc", "free", "imperial", "PrinceBin", 
  "AuthTotal", "Schoeffen", 
  "Mkt2", "Frgn", "Mint",
  "VfgTotal", "VrtTotPrev", "J_dev_sum",
  "Period15yrs(1400,1415]", "Period15yrs(1415,1430]", "Period15yrs(1430,1445]",
  "Period15yrs(1445,1460]", "Period15yrs(1460,1475]", "Period15yrs(1475,1490]",
  "Period15yrs(1490,1505]", "Period15yrs(1505,1520]",
  "(Intercept)", "Sigma[PlaceID:(Intercept),(Intercept)]", "mean_PPD", "log-posterior")

label.order.m2 <- c("Dif2: Any",
  "Episcopal", "Free", "Imperial", "Prince", 
  "Count of dominion parties", "Schöffen",  
  "Market development", "Foreign moneylenders", "Mint", 
  "Total Previous Persecutions", "Total Previous Expulsions",
  "Jewish community development", 
  "1401-1415", "1416-1430", "1431-1445",
  "1446-1460", "1461-1475", "1476-1490",
  "1491-1505", "1506-1520",
  "Global Intercept", "Sigma", "Mean PPD", "log-posterior")

# Pattern 3: fit_relchange

var.order.m3 <- c("Expl_sum10", "postConstance", "Expl_sum10:postConstance", "postConstance:AuthEpisc", "dist_Constance",
  "AuthEpisc", "free", "imperial", "PrinceBin", 
  "AuthTotal", "Schoeffen", 
  "Mkt2", "Frgn", "Mint",
  "VfgTotal", "VrtTotPrev", "J_dev_sum",
  "Period15yrs(1400,1415]", "Period15yrs(1415,1430]", "Period15yrs(1430,1445]",
  "Period15yrs(1445,1460]", "Period15yrs(1460,1475]", "Period15yrs(1475,1490]",
  "Period15yrs(1490,1505]", "Period15yrs(1505,1520]",
  "(Intercept)", "Sigma[PlaceID:(Intercept),(Intercept)]", "mean_PPD", "log-posterior")

label.order.m3 <- c("Dif1", "After Constance", "Expl_sum10*postConstance", "Episcopal*postConstance", "Km to Constance",
  "Episcopal", "Free", "Imperial", "Prince", 
  "Count of dominion parties", "Schöffen",  
  "Market development", "Foreign moneylenders", "Mint", 
  "Total Previous Persecutions", "Total Previous Expulsions",
  "Jewish community development", 
  "1401-1415", "1416-1430", "1431-1445",
  "1446-1460", "1461-1475", "1476-1490",
  "1491-1505", "1506-1520",
  "Global Intercept", "Sigma", "Mean PPD", "log-posterior")


# Pattern 4: fit_segmented

var.order.m4 <- c("dist_Constance", "Expl_sum10_Episc", "Expl_sum10_FreeImp", "Expl_sum10_Episc:AuthEpisc", "Expl_sum10_FreeImp:AuthFreeImp",
  "AuthEpisc", "AuthFreeImp", "PrinceBin", 
  "AuthTotal", "Schoeffen", 
  "Mkt2", "Frgn", "Mint",
  "VfgTotal", "VrtTotPrev", "J_dev_sum",
  "Period15yrs(1400,1415]", "Period15yrs(1415,1430]", "Period15yrs(1430,1445]",
  "Period15yrs(1445,1460]", "Period15yrs(1460,1475]", "Period15yrs(1475,1490]",
  "Period15yrs(1490,1505]", "Period15yrs(1505,1520]",
  "(Intercept)", "Sigma[PlaceID:(Intercept),(Intercept)]", "mean_PPD", "log-posterior")

label.order.m4 <- c("Km to Constance", "Dif3: Episcopal", "Dif3: Free/Imperial", "Dif3:Episc*Episc", "Dif3:FreeImp*FreeImp", 
  "Episcopal", "Free/Imperial", "Prince",
  "Count of dominion parties", "Schöffen",  
  "Market development", "Foreign moneylenders", "Mint", 
  "Total Previous Persecutions", "Total Previous Expulsions",
  "Jewish community development", 
  "1401-1415", "1416-1430", "1431-1445",
  "1446-1460", "1461-1475", "1476-1490",
  "1491-1505", "1506-1520",
  "Global Intercept", "Sigma", "Mean PPD", "log-posterior")

# Pattern 5: fit_segdistance
var.order.m5 <- c("dist_Constance", "Expl_sum10_Episc", "Expl_sum10_FreeImp", 
  "Expl_sum10_Episc:AuthEpisc", "Expl_sum10_FreeImp:AuthFreeImp", 
  "const10_Episc", "const10_FreeImp", 
  "AuthEpisc","AuthFreeImp", "PrinceBin", 
  "AuthTotal", "Schoeffen", 
  "Mkt2", "Frgn", "Mint",
  "VfgTotal", "VrtTotPrev", "J_dev_sum",
  "Period15yrs(1400,1415]", "Period15yrs(1415,1430]", "Period15yrs(1430,1445]",
  "Period15yrs(1445,1460]", "Period15yrs(1460,1475]", "Period15yrs(1475,1490]",
  "Period15yrs(1490,1505]", "Period15yrs(1505,1520]",
  "(Intercept)", "Sigma[PlaceID:(Intercept),(Intercept)]", "mean_PPD", "log-posterior")

label.order.m5 <- c("Km to Constance", "Dif3: Episcopal", "Dif3: Free/Imperial", 
  "Dif3:Episc*AuthEpisc", "Dif3:FreeImp*AuthFreeImp", 
  "Dif4: Episcopal", "Dif4: Free/Imperial", 
  "Episcopal", "Free/Imperial", "Prince",
  "Count of dominion parties", "Schöffen",  
  "Market development", "Foreign moneylenders", "Mint", 
  "Total Previous Persecutions", "Total Previous Expulsions",
  "Jewish community development", 
  "1401-1415", "1416-1430", "1431-1445",
  "1446-1460", "1461-1475", "1476-1490",
  "1491-1505", "1506-1520",
  "Global Intercept", "Sigma", "Mean PPD", "log-posterior")

# Pattern 6: fit_hybrid

var.order.m6 <- c("postConstance", "dist_Constance",
  "dist10_rivroad", "Expl_sum10_FreeImp", 
  "AuthEpisc","AuthFreeImp", "PrinceBin", 
  "AuthTotal", "Schoeffen", 
  "Mkt2", "Frgn", "Mint",
  "VfgTotal", "VrtTotPrev", "J_dev_sum",
  "Period15yrs(1400,1415]", "Period15yrs(1415,1430]", "Period15yrs(1430,1445]",
  "Period15yrs(1445,1460]", "Period15yrs(1460,1475]", "Period15yrs(1475,1490]",
  "Period15yrs(1490,1505]", "Period15yrs(1505,1520]",
  "(Intercept)", "Sigma[PlaceID:(Intercept),(Intercept)]", "mean_PPD", "log-posterior")

label.order.m6 <- c("After Constance", "Km to Constance",
  "Dif2: Any", "Dif3: Free/Imperial",  
  "Episcopal", "Free/Imperial", "Prince",
  "Count of dominion parties", "Schöffen",  
  "Market development", "Foreign moneylenders", "Mint", 
  "Total Previous Persecutions", "Total Previous Expulsions",
  "Jewish community development", 
  "1401-1415", "1416-1430", "1431-1445",
  "1446-1460", "1461-1475", "1476-1490",
  "1491-1505", "1506-1520",
  "Global Intercept", "Sigma", "Mean PPD", "log-posterior")

# model output: fit_nhood, fit_learning, fit_relchange, fit_segmented, fit_segdistance, fit_hybrid (, fit_peers)

m1_table_raw <- tidy.stansummary(fit_learning, cred.levels = c(0.055, 0.5, 0.945), digs = 2)
m2_table_raw <- tidy.stansummary(fit_nhood, cred.levels = c(0.055, 0.5, 0.945), digs = 2)
m3_table_raw <- tidy.stansummary(fit_relchange, cred.levels = c(0.055, 0.5, 0.945), digs = 2)
m4_table_raw <- tidy.stansummary(fit_segmented, cred.levels = c(0.055, 0.5, 0.945), digs = 2)
m5_table_raw <- tidy.stansummary(fit_segdistance, cred.levels = c(0.055, 0.5, 0.945), digs = 2)
m6_table_raw <- tidy.stansummary(fit_hybrid, cred.levels = c(0.055, 0.5, 0.945), digs = 2)

m1_table <- m1_table_raw %>%
  dplyr::filter(terms %in% var.order.m1) %>%
  slice(match(var.order.m1, terms)) %>%
  mutate(terms=label.order.m1)
# m1_table <- rbind(m1_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m1_table)-2))))

m2_table <- m2_table_raw %>%
  dplyr::filter(terms %in% var.order.m2) %>%
  slice(match(var.order.m2, terms)) %>%
  mutate(terms=label.order.m2)
# m2_table <- rbind(m2_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m2_table)-2))))

m3_table <- m3_table_raw %>%
  dplyr::filter(terms %in% var.order.m3) %>%
  slice(match(var.order.m3, terms)) %>%
  mutate(terms=label.order.m3)
# m3_table <- rbind(m3_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m3_table)-2))))

m4_table <- m4_table_raw %>%
  dplyr::filter(terms %in% var.order.m4) %>%
  slice(match(var.order.m4, terms)) %>%
  mutate(terms=label.order.m4)
# m4_table <- rbind(m4_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m4_table)-2))))

m5_table <- m5_table_raw %>%
  dplyr::filter(terms %in% var.order.m5) %>%
  slice(match(var.order.m5, terms)) %>%
  mutate(terms=label.order.m5)
# m5_table <- rbind(m5_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m5_table)-2))))

m6_table <- m6_table_raw %>%
  dplyr::filter(terms %in% var.order.m6) %>%
  slice(match(var.order.m6, terms)) %>%
  mutate(terms=label.order.m6)
# m6_table <- rbind(m6_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m6_table)-2))))

write_csv(m1_table, here::here("Diffusion","out_tables","table_3_model1.csv"))
write_csv(m2_table, here::here("Diffusion","out_tables","table_3_model2.csv"))
write_csv(m3_table, here::here("Diffusion","out_tables","table_3_model3.csv"))
write_csv(m4_table, here::here("Diffusion","out_tables","table_3_model4.csv"))
write_csv(m5_table, here::here("Diffusion","out_tables","table_3_model5.csv"))
write_csv(m6_table, here::here("Diffusion","out_tables","table_3_model6.csv"))

```
  
Table of results for the each model  
1.    
```{r table_3_model1, eval=TRUE, results='asis', echo=FALSE}
stargazer(m1_table, type = "html", summary = FALSE, align=TRUE)
```
  
2.
```{r table_3_model2, eval=TRUE, results='asis', echo=FALSE}
stargazer(m2_table, type = "html", summary = FALSE, align=TRUE)
```
  
3.
```{r table_3_model3, eval=TRUE, results='asis', echo=FALSE}
stargazer(m3_table, type = "html", summary = FALSE, align=TRUE)
```
  
4.
```{r table_3_model4, eval=TRUE, results='asis', echo=FALSE}
stargazer(m4_table, type = "html", summary = FALSE, align=TRUE)
```
  
5.  
```{r table_3_model5, eval=TRUE, results='asis', echo=FALSE}
stargazer(m5_table, type = "html", summary = FALSE, align=TRUE)
```
  
6. 
```{r table_3_model6, eval=TRUE, results='asis', echo=FALSE}
stargazer(m6_table, type = "html", summary = FALSE, align=TRUE)
```


### Main results: Figures

Visualize in interpretable, meaningful ways by presenting predicted probabilities, first differences, or relative risks for different counterfactuals.  

```{r Figs4_areas_plots, eval = TRUE, echo = FALSE, fig.height = 10, fig.width = 15}

fig.var.order.m1 <- c("Expl_sum10", "RouteCount", "BundTotPrev")
fig.label.order.m1 <- c("Dif 1: Time lag", "Travel routes", "Alliances")

fig.var.order.m2 <- c("dist10_rivroad")
fig.label.order.m2 <- c("Dif 2: Time-distance lag")

fig.var.order.m3 <- c("Expl_sum10", "postConstance", "dist_Constance")
fig.label.order.m3 <- c("Dif 1: Time lag", "After Constance", "Km to Constance")

fig.var.order.m4 <- c("Expl_sum10_Episc", "Expl_sum10_FreeImp", "dist_Constance")
fig.label.order.m4 <- c("Dif 3: Time, Episcopal", "Dif 3: Time, Free/Imperial", "Km to Constance")

fig.var.order.m5 <- c("Expl_sum10_Episc", "const10_Episc", "Expl_sum10_FreeImp", "const10_FreeImp", "dist_Constance")
fig.label.order.m5 <- c("Dif 3: Time, Episcopal", "Dif 4: Time-dist, Episcopal", 
  "Dif 3: Time, Free/Imperial", "Dif 4: Time-dist, Free/Imperial", "Km to Constance")

fig.var.order.m6 <- c("dist10_rivroad", "Expl_sum10_FreeImp", "postConstance", "dist_Constance")
fig.label.order.m6 <- c("Dif 2: Time-distance lag", "Dif 3: Time, Free/Imperial", "After Constance", "Km to Constance")

# plot posterior medians with credibility intervals
# binary: shows moving 0-> 1
# scaled: shows moving mean->mean+1/.5 (=mean+2)
# 2+mean(model_data$dist10_rivroad) #2.47
# 2+mean(model_data$Expl_sum10) #5.7
# 2+mean(model_data$dist2snap_roads) #10.99
color_scheme_set("gray")
# THIS WILL TAKE A WHILE!! TRY TO AVOID RECOMPILING THIS RMD AND RE-RUNNING - CAN COPY TO THE MAIN SCRIPT
# model output: fit_learning, fit_nhood, fit_relchange, fit_segmented, fit_segdistance, fit_hybrid (, fit_peers)
posterior_m1 <- as.array(fit_learning)
posterior_m2 <- as.array(fit_nhood)
posterior_m3 <- as.array(fit_relchange)
posterior_m4 <- as.array(fit_segmented)
posterior_m5 <- as.array(fit_segdistance)
posterior_m6 <- as.array(fit_hybrid)

p1 <- mcmc_areas(posterior_m1, pars = rev(fig.var.order.m1),
                 prob = 0.5, prob_outer = 0.89, # 50% and 89% intervals
                 point_est = "median",
                 transformations="plogis") + # inverse logit: probability
  vline_at(0.5, linetype = 3, size = 0.25) +
  scale_y_discrete() +
  scale_y_discrete(labels=rev(fig.label.order.m1)) +
  scale_x_continuous() +
  coord_cartesian(xlim = c(0, 0.97)) +
  scale_x_continuous(breaks = c(.125,.3125,.5,.6875,.875),
                     labels=c(round(0.25*mean(model_data$Vrtrb),4), # 1/8
                              round(0.75*mean(model_data$Vrtrb),4), # 5/8
                              round(mean(model_data$Vrtrb),4),
                              round(1.375*mean(model_data$Vrtrb),4), # 11/8
                              round(1.75*mean(model_data$Vrtrb),4))) + # 14/8
  labs(title = "Model 1") + mytheme

p2 <- mcmc_areas(posterior_m2, pars = rev(fig.var.order.m2),
                 prob = 0.5, prob_outer = 0.89,
                 point_est = "median", transformations="plogis") +
  vline_at(0.5, linetype = 3, size = 0.25) +
  scale_y_discrete() +
  scale_y_discrete(labels=rev(fig.label.order.m2)) +
  scale_x_continuous() +
  coord_cartesian(xlim = c(0, 0.97)) +
  scale_x_continuous(breaks = c(.125,.3125,.5,.6875,.875),
                     labels=c(round(0.25*mean(model_data$Vrtrb),4),
                              round(0.75*mean(model_data$Vrtrb),4),
                              round(mean(model_data$Vrtrb),4),
                              round(1.375*mean(model_data$Vrtrb),4),
                              round(1.75*mean(model_data$Vrtrb),4))) +
  labs(title = "Model 2") + mytheme

p3 <- mcmc_areas(posterior_m3, pars = rev(fig.var.order.m3),
                 prob = 0.5, prob_outer = 0.89,
                 point_est = "median", transformations="plogis") +
  vline_at(0.5, linetype = 3, size = 0.25) +
  scale_y_discrete() +
  scale_y_discrete(labels=rev(fig.label.order.m3)) +
  scale_x_continuous() +
  coord_cartesian(xlim = c(0, 0.97)) +
  scale_x_continuous(breaks = c(.125,.3125,.5,.6875,.875),
                     labels=c(round(0.25*mean(model_data$Vrtrb),4),
                              round(0.75*mean(model_data$Vrtrb),4),
                              round(mean(model_data$Vrtrb),4),
                              round(1.375*mean(model_data$Vrtrb),4),
                              round(1.75*mean(model_data$Vrtrb),4))) +
  labs(title = "Model 3") + mytheme

p4 <- mcmc_areas(posterior_m4, pars = rev(fig.var.order.m4),
                 prob = 0.5, prob_outer = 0.89,
                 point_est = "median", transformations="plogis") +
  vline_at(0.5, linetype = 3, size = 0.25) +
  scale_y_discrete() +
  scale_y_discrete(labels=rev(fig.label.order.m4)) +
  scale_x_continuous() +
  coord_cartesian(xlim = c(0, 0.97)) +
  scale_x_continuous(breaks = c(.125,.3125,.5,.6875,.875),
                     labels=c(round(0.25*mean(model_data$Vrtrb),4),
                              round(0.75*mean(model_data$Vrtrb),4),
                              round(mean(model_data$Vrtrb),4),
                              round(1.375*mean(model_data$Vrtrb),4),
                              round(1.75*mean(model_data$Vrtrb),4))) +
  labs(title = "Model 4") + mytheme

p5 <- mcmc_areas(posterior_m5, pars = rev(fig.var.order.m5),
                 prob = 0.5, prob_outer = 0.89,
                 point_est = "median", transformations="plogis") +
  vline_at(0.5, linetype = 3, size = 0.25) +
  scale_y_discrete() +
  scale_y_discrete(labels=rev(fig.label.order.m5)) +
  scale_x_continuous() +
  coord_cartesian(xlim = c(0, 0.97)) +
  scale_x_continuous(breaks = c(.125,.3125,.5,.6875,.875),
                     labels=c(round(0.25*mean(model_data$Vrtrb),4),
                              round(0.75*mean(model_data$Vrtrb),4),
                              round(mean(model_data$Vrtrb),4),
                              round(1.375*mean(model_data$Vrtrb),4),
                              round(1.75*mean(model_data$Vrtrb),4))) +
  labs(title = "Model 5") + mytheme

p6 <- mcmc_areas(posterior_m6, pars = rev(fig.var.order.m6),
                 prob = 0.5, prob_outer = 0.89,
                 point_est = "median", transformations="plogis") +
  vline_at(0.5, linetype = 3, size = 0.25) +
  scale_y_discrete() +
  scale_y_discrete(labels=rev(fig.label.order.m6)) +
  scale_x_continuous() +
  coord_cartesian(xlim = c(0, 0.97)) +
  scale_x_continuous(breaks = c(.125,.3125,.5,.6875,.875),
                     labels=c(round(0.25*mean(model_data$Vrtrb),4),
                              round(0.75*mean(model_data$Vrtrb),4),
                              round(mean(model_data$Vrtrb),4),
                              round(1.375*mean(model_data$Vrtrb),4),
                              round(1.75*mean(model_data$Vrtrb),4))) +
  labs(title = "Model 6") + mytheme

p1<-p1+mytheme
p2<-p2+mytheme
p3<-p3+mytheme
p4<-p4+mytheme
p5<-p5+mytheme
p6<-p6+mytheme
p.compare_posteriors <- cowplot::plot_grid(p1, p2, p3, p4, p5, p6,
                  ncol = 2, align = 'hv', 
                  labels = c(""),
                  hjust = -1)

# tiff(here::here("Diffusion", "out_figures", "Figs4_areas_plots-1.tiff"), width = 15, height = 10, units = 'in', res = 300)
plot(p.compare_posteriors)
# dev.off()

```
  

also useful: a plot of all posterior distributions  

```{r prediction_plots_areas_m4, eval = FALSE, echo = FALSE, warnings=FALSE, fig.height = 8.5, fig.width = 8}

# plot posterior means with distribution densities
# p.allareas.m5 <- mcmc_areas(
#   posterior_m5, 
#   pars = c(var.order.m5),
#   prob = 0.5, prob_outer = 0.89, # 50+89% intervals
#   point_est = "median")
# plot(p.allareas.m5)

# use counterfactual conditions as new data
#ytilde <- posterior_predict(fit_all_partpool, cfact, draws = 1000)

# calculate first differences or relative risks

```
```{r prediction_plots_areas_m6, eval = FALSE, echo = FALSE, warnings=FALSE, fig.height = 8.5, fig.width = 8}

# plot posterior means with distribution densities
# p.allareas.m6 <- mcmc_areas(
#   posterior_m6, 
#   pars = c(var.order.m6),
#   prob = 0.5, prob_outer = 0.89, # 50+89% intervals
#   point_est = "median")
# plot(p.allareas.m6)
```

## Geospatial Appendix

```{r autocorrelation_appendix, eval=FALSE, echo=FALSE}
rmarkdown::render(here::here("Diffusion","Diffusion_Autocorrelation_22-04-07.Rmd"))
```

  See the separate, RMarkdown-generated report on spatial autocorrelation!

## Regression Analysis Appendix
 
### Re-run analyses at 5-year and 20-year spans

Descriptives for dif time lengths
```{r table_b2_yearlag_descriptives, eval=TRUE, results='asis', echo=FALSE}
stargazer(rbind(t(descr_post[,table_b2_5_vars]), t(descr_20[,table_b2_20_vars])),
          digits=2, type = "html", summary = FALSE, align=TRUE)
```

```{r models_setup_yearlag, eval= TRUE, echo=FALSE}

rbind(t(descr_post[,table_b2_5_vars]), t(descr_20[,table_b2_20_vars])) %>% round(2) %>% write.csv( here::here("Diffusion","out_tables","table_b2_yearlag_descriptives.csv"))

# 5 year ---
x.segdistance5 <- as.formula(Vrtrb ~  
  # diffusion measures:
    Expl_sum5_Episc + # Church-led
    Expl_sum5_Episc * AuthEpisc + # within Church
    const5_Episc + # with meaning-based spatial trend - Dif4
    Expl_sum5_FreeImp + # autonomous-led
    Expl_sum5_FreeImp * AuthFreeImp + # within autonomous
    const5_FreeImp + # with meaning-based spatial trend - Dif4
    dist_Constance + # with universal spatial trend
  # local factors:
    AuthFreeImp + AuthEpisc + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)

# 20 year ---
x.segdistance20 <- as.formula(Vrtrb ~  
  # diffusion measures:
    Expl_sum20_Episc + # Church-led
    Expl_sum20_Episc * AuthEpisc + # within Church
    const20_Episc + # with meaning-based spatial trend - Dif4
    Expl_sum20_FreeImp + # autonomous-led
    Expl_sum20_FreeImp * AuthFreeImp + # within autonomous
    const20_FreeImp + # with meaning-based spatial trend - Dif4
    dist_Constance + # with universal spatial trend
  # local factors:
    AuthFreeImp + AuthEpisc + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)
```
```{r models_output_yearlag, eval = TRUE, echo = FALSE}
fit_segdistance_5yr <- stan_glmer(x.segdistance5,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_segdistance_5yr, here::here("Diffusion", "out_files", "fit_segdistance_5yr.rds"))
# fit_segdistance_5yr <- readRDS(here::here("Diffusion", "out_files", "fit_segdistance_5yr.rds"))


fit_segdistance_20yr <- stan_glmer(x.segdistance20,
                              data = model_data_20_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_segdistance_20yr, here::here("Diffusion", "out_files", "fit_segdistance_20yr.rds"))
# fit_segdistance_20yr <- readRDS(here::here("Diffusion", "out_files", "fit_segdistance_20yr.rds"))


m7_table_raw <- tidy.stansummary(fit_segdistance_5yr, cred.levels = c(0.055, 0.5, 0.945), digs = 2)
m8_table_raw <- tidy.stansummary(fit_segdistance_20yr, cred.levels = c(0.055, 0.5, 0.945), digs = 2)

var.order.m7 <- c("dist_Constance", "Expl_sum5_Episc", "Expl_sum5_FreeImp", 
  "Expl_sum5_Episc:AuthEpisc", "Expl_sum5_FreeImp:AuthFreeImp", 
  "const5_Episc", "const5_FreeImp", 
  "AuthEpisc","AuthFreeImp", "PrinceBin", 
  "AuthTotal", "Schoeffen", 
  "Mkt2", "Frgn", "Mint",
  "VfgTotal", "VrtTotPrev", "J_dev_sum",
  "Period15yrs(1400,1415]", "Period15yrs(1415,1430]", "Period15yrs(1430,1445]",
  "Period15yrs(1445,1460]", "Period15yrs(1460,1475]", "Period15yrs(1475,1490]",
  "Period15yrs(1490,1505]", "Period15yrs(1505,1520]",
  "(Intercept)", "Sigma[PlaceID:(Intercept),(Intercept)]", "mean_PPD", "log-posterior")

var.order.m8 <- c("dist_Constance", "Expl_sum20_Episc", "Expl_sum20_FreeImp", 
  "Expl_sum20_Episc:AuthEpisc", "Expl_sum20_FreeImp:AuthFreeImp", 
  "const20_Episc", "const20_FreeImp",
  "AuthEpisc","AuthFreeImp", "PrinceBin", 
  "AuthTotal", "Schoeffen", 
  "Mkt2", "Frgn", "Mint",
  "VfgTotal", "VrtTotPrev", "J_dev_sum",
  "Period15yrs(1400,1415]", "Period15yrs(1415,1430]", "Period15yrs(1430,1445]",
  "Period15yrs(1445,1460]", "Period15yrs(1460,1475]", "Period15yrs(1475,1490]",
  "Period15yrs(1490,1505]", "Period15yrs(1505,1520]",
  "(Intercept)", "Sigma[PlaceID:(Intercept),(Intercept)]", "mean_PPD", "log-posterior")


m7_table <- m7_table_raw %>%
  dplyr::filter(terms %in% var.order.m7) %>%
  slice(match(var.order.m7, terms)) %>%
  mutate(terms=label.order.m5)
m7_table <- rbind(m7_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m7_table)-2))))

m8_table <- m8_table_raw %>%
  dplyr::filter(terms %in% var.order.m8) %>%
  slice(match(var.order.m8, terms)) %>%
  mutate(terms=label.order.m5)
m8_table <- rbind(m8_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m8_table)-2))))

write_csv(m7_table, here::here("Diffusion","out_tables","table_b3_model7.csv"))
write_csv(m8_table, here::here("Diffusion","out_tables","table_b3_model8.csv"))

```

  Table: Model 7 (5 year lag)  

```{r table_b3_model7_5yr, eval=TRUE, results='asis', echo=FALSE}
stargazer(m7_table, type = "html", summary = FALSE, align=TRUE)
```
  
  Table: Model 8 (20 year lag)  
  
```{r table_b3_model8_20yr, eval=TRUE, results='asis', echo=FALSE}
stargazer(m8_table, type = "html", summary = FALSE, align=TRUE)
```
  
  
### Climate - re-run for appendix

Looking at alternatives and sensitivity - what about AJK (2017)'s result that climate and agricultural suitability were important for persecutions?  

```{r models_setup_climate, eval=TRUE, echo=FALSE}

# fit_segdistance, fit_hybrid, fit_nhood, fit_peers
# I don't need to re-run with temp for all of the models, just the ones that are actually useful: 

x.segdistance.clm <- as.formula(Vrtrb ~  
    temp_lag1 +
  # diffusion measures:
    Expl_sum10_Episc + # Church-led
    Expl_sum10_Episc * AuthEpisc + # within Church
    const10_Episc + # with meaning-based spatial trend - Dif4
    Expl_sum10_FreeImp + # autonomous-led
    Expl_sum10_FreeImp * AuthFreeImp + # within autonomous
    const10_FreeImp + # with meaning-based spatial trend - Dif4
    dist_Constance + # with universal spatial trend
  # local factors:
    AuthFreeImp + AuthEpisc + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)

# Pattern 6: increase from 1418 among autonomous cities (Pattern 6) plus suppression nearby – temporal, categorical, spatial
# Dif2: Expulsions/Distance (10-year)

x.hybrid.clm <- as.formula(Vrtrb ~  
    temp_lag1 +
  # diffusion measures:
    postConstance + # historical change
    dist_Constance + # universal spatial trend
    Expl_sum10_FreeImp + # autonomous-led
    dist10_rivroad + # local spatial trend
  # local factors:
    AuthFreeImp + AuthEpisc + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_sum + # Jewish community
    Mkt2 + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)

```
```{r models_output_climate, echo = FALSE, eval=TRUE}

# fit_segdistance_clm, fit_hybrid_clm, fit_peers_clm

fit_segdistance_clm <- stan_glmer(x.segdistance.clm,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_segdistance_clm, here::here("Diffusion", "out_files", "fit_segdistance_clm.rds"))
# fit_segdistance_clm <- readRDS(here::here("Diffusion", "out_files", "fit_segdistance_clm.rds"))

fit_hybrid_clm <- stan_glmer(x.hybrid.clm,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_hybrid_clm, here::here("Diffusion", "out_files", "fit_hybrid_clm.rds"))
# fit_hybrid_clm <- readRDS(here::here("Diffusion", "out_files", "fit_hybrid_clm.rds"))


m9_table_raw <- tidy.stansummary(fit_segdistance_clm, cred.levels = c(0.055, 0.5, 0.945), digs = 2)
m9_table <- m9_table_raw %>%
  dplyr::filter(terms %in% c(c("temp_lag1"), var.order.m5)) %>%
  slice(match(c("temp_lag1", var.order.m5), terms)) %>%
  mutate(terms=c("temp_lag1", label.order.m5))
m9_table <- rbind(m9_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m9_table)-2))))

m10_table_raw <- tidy.stansummary(fit_hybrid_clm, cred.levels = c(0.055, 0.5, 0.945), digs = 2)
m10_table <- m10_table_raw %>%
  dplyr::filter(terms %in% c(c("temp_lag1"), var.order.m6)) %>%
  slice(match(c("temp_lag1", var.order.m6), terms)) %>%
  mutate(terms=c("temp_lag1", label.order.m6))
m10_table <- rbind(m10_table, c("Nested City:Period Random effects", "Y",c(rep("",ncol(m10_table)-2))))

write_csv(m9_table, here::here("Diffusion","out_tables","table_b1_model9.csv"))
write_csv(m10_table, here::here("Diffusion","out_tables","table_b1_model10.csv"))

```
```{r table_b1_model9_clm, eval=TRUE, results='asis', echo=FALSE}
stargazer(m9_table, type = "html", summary = FALSE, align=TRUE)
```
```{r table_b1_model10_clm, eval=TRUE, results='asis', echo=FALSE}
stargazer(m10_table, type = "html", summary = FALSE, align=TRUE)
```
 
```{r prediction_plots_areas_clm, eval = FALSE, echo = FALSE, fig.height = 4, fig.width = 14}
# plot posterior medians with credibility intervals
# binary: shows moving 0-> 1
# scaled: shows moving mean->mean+1/.5 (=mean+2)

posterior_m9 <- as.array(fit_segdistance_clm)
posterior_m10 <- as.array(fit_hybrid_clm)

p9 <- mcmc_areas(posterior_m9, pars = rev(c("temp_lag1",fig.var.order.m4),
                 prob = 0.5, prob_outer = 0.89,
                 point_est = "median", transformations="plogis") +
  vline_at(0.5, linetype = 3, size = 0.25) +
  scale_y_discrete() +
  scale_y_discrete(labels=rev(c("Temperature",fig.label.order.m5)) +
  scale_x_continuous() +
  coord_cartesian(xlim = c(0.0005, 0.97)) +
  scale_x_continuous(breaks = c(.167,.33,.5,.665,.83),
                     labels=c(round(0.33*mean(model_data$Vrtrb),4),
                              round(0.66*mean(model_data$Vrtrb),4),
                              round(mean(model_data$Vrtrb),4),
                              round(1.33*mean(model_data$Vrtrb),4),
                              round(1.66*mean(model_data$Vrtrb),4))) +
  labs(title = "Model 9") + mytheme

p10 <- mcmc_areas(posterior_m10, pars = rev(c("temp_lag1",fig.var.order.m5),
                 prob = 0.5, prob_outer = 0.89,
                 point_est = "median", transformations="plogis") +
  vline_at(0.5, linetype = 3, size = 0.25) +
  scale_y_discrete() +
  scale_y_discrete(labels=rev(c("Temperature",fig.label.order.m6)) +
  scale_x_continuous() +
  coord_cartesian(xlim = c(0.0005, 0.97)) +
  scale_x_continuous(breaks = c(.167,.33,.5,.665,.83),
                     labels=c(round(0.33*mean(model_data$Vrtrb),4),
                              round(0.66*mean(model_data$Vrtrb),4),
                              round(mean(model_data$Vrtrb),4),
                              round(1.33*mean(model_data$Vrtrb),4),
                              round(1.66*mean(model_data$Vrtrb),4))) +
  labs(title = "Model 10") + mytheme

p.compare_posteriors_clm <- cowplot::plot_grid(p9, p10,
                  ncol = 2, align = 'hv', 
                  labels = c(""),
                  hjust = -1)

# tiff(here::here("Diffusion", "out_figures", "FigsA2_areas_plots-1_bw.tiff"), width = 14, height = 10, units = 'in', res = 300)
plot(p.compare_posteriors_clm)
# dev.off()

```


### Adjust control measurement
``` {r models_setup_controls, eval=FALSE, echo=FALSE}

x.segdistance.cont <- as.formula(Vrtrb ~
    Expl_sum10_Episc + # Church-led
    Expl_sum10_Episc * AuthEpisc + # within Church
    const10_Episc + # with meaning-based spatial trend - Dif4
    Expl_sum10_FreeImp + # autonomous-led
    Expl_sum10_FreeImp * AuthFreeImp + # within autonomous
    const10_FreeImp + # with meaning-based spatial trend - Dif4
    dist_Constance + # with universal spatial trend
  # local factors:
    AuthFreeImp + AuthEpisc + PrinceBin +
    AuthTotal + # total parties with claims to sovereignty
    Schoeffen + #Castellan + Schlthss + # local rule
    VfgTotal + VrtTotPrev + # prior persecutions
    J_dev_bin + # Jewish community
    Mkt_bin + Frgn + Mint + # commercial development
  # settlement random effects
  (1|PlaceID/ObsID) + Period15yrs)
```

```{r models_output_controls, echo = FALSE, eval=FALSE}
fit_segdistance_cont <- stan_glmer(x.segdistance.cont,
                              data = model_data_scaled,
                              family = binomial(link = "logit"),
                              prior = t_prior,
                              prior_intercept = half_t_prior,
                              #QR=FALSE, sparse=TRUE,
                              iter=5000, cores=parallel::detectCores(),
                              chains=5, seed=1418,
                              control = list(adapt_delta = 0.99)
                              )
saveRDS(fit_segdistance_cont, here::here("Diffusion", "out_files", "fit_segdistance_cont.rds"))
# fit_segdistance_cont <- readRDS(here::here("Diffusion", "out_files", "fit_segdistance_cont.rds"))
```


## Model Details 

```{r table_c1_model_details, eval=TRUE, echo=TRUE}

sampler_param_summary(fit_learning, "divergent__", sum) # m1
sampler_param_summary(fit_nhood, "divergent__", sum) # m2
sampler_param_summary(fit_relchange, "divergent__", sum) # m3
sampler_param_summary(fit_segmented, "divergent__", sum) # m4
sampler_param_summary(fit_segdistance, "divergent__", sum) # m5
sampler_param_summary(fit_hybrid, "divergent__", sum) # m6
sampler_param_summary(fit_segdistance_5yr, "divergent__", sum) # m7
sampler_param_summary(fit_segdistance_20yr, "divergent__", sum) # m8
sampler_param_summary(fit_segdistance_clm, "divergent__", sum) # m9
sampler_param_summary(fit_hybrid_clm, "divergent__", sum) # m10
sampler_param_summary(fit_segdistance_cont, "divergent__", sum) # m11

```

## Extra
 
 
 Table of descriptives for scaled data, all years  
   
```{r table_aa_descriptives_scaled, eval=FALSE, results='asis', echo=FALSE}
stargazer(t(descr_scaled[,c(table_1_vars, table_2_vars, "temp_lag1")]), digits=2, type = "html", summary = FALSE, align=TRUE, out=here::here("Diffusion","out_tables","descriptives_scaled.htm"))
```


***
## R Session Info
```{r session_info}
sessionInfo()
```
